# -*- coding: utf-8 -*-
# @Time : 2021/9/19 0:13
# @Author : Kuns (Huang Yuxuan)
# @Project : PythonSpider
# @File : Work3_Shiyan7.py
# @Software : PyCharm

import requests
import re
import csv
import time

url = 'https://movie.douban.com/chart'
headers = {
    'Cookie': 'bid=jrZxkRSXWps; __utma=30149280.921693842.1625506714.1631072732.1631981648.3; __utmz=30149280.1631072732.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __gads=ID=2b640d0058593a00-22e28b1e2fca00a8:T=1625506715:RT=1625506715:S=ALNI_MZ1SqApXmZw3pz-CFW4vB10j2JFaw; douban-fav-remind=1; _pk_id.100001.4cf6=89e4c99b61725384.1631981648.1.1631981648.1631981648.; _pk_ses.100001.4cf6=*; ap_v=0,6.0; __utmb=30149280.0.10.1631981648; __utmc=30149280; __utma=223695111.1462871077.1631981648.1631981648.1631981648.1; __utmb=223695111.0.10.1631981648; __utmc=223695111; __utmz=223695111.1631981648.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0'
}

response = requests.get(url,headers=headers)
response.encoding = response.apparent_encoding
result_on = re.findall(r'.*?<a class="nbg" href="(https://movie\.douban\.com/subject/\d{8}/)"\s{2}title="(.*?)">.*?',response.text,re.M)
result_tw = re.findall(r'.*?<span class="rating_nums">(\d\.\d)</span>.*?',response.text,re.M)
result_th = re.findall(r'.*?<span class="pl">\((\d.*?)人评价\)</span>.*?',response.text,re.M)
rows = []
for i in range(len(result_on)):
    list_i = []
    list_i.append(result_on[i][0])
    list_i.append(result_on[i][1])
    list_i.append(result_tw[i])
    list_i.append(result_th[i])
    for j in list_i:
        print(j,end=' ')
    print('\n')
    rows.append(list_i)
    time.sleep(0.5)

headers_csv = ['电影链接','电影名','豆瓣评分','评价人数']

with open(r'F:\python爬虫\test3\douban.csv','w',newline='') as f:
    f_csv = csv.writer(f)
    f_csv.writerow(headers_csv)
    f_csv.writerows(rows)
    f.close()

print('finish')